package us.codecraft.webmagic.scheduler;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.scheduler.component.DuplicateRemover;
import us.codecraft.webmagic.scheduler.component.HashSetDuplicateRemover;
import us.codecraft.webmagic.utils.HttpConstant;

/***
 * Remove duplicate urls and only push urls which are not duplicate.<br>
 * *<br>
 * **
 * 
 * @author code4crafer@gmail.com
 * @since 0.5.0
 */
public abstract class DuplicateRemovedScheduler implements Scheduler {

	protected Logger logger = LoggerFactory.getLogger(getClass());

	private DuplicateRemover duplicatedRemover = new HashSetDuplicateRemover();

	public DuplicateRemover getDuplicateRemover() {
		return duplicatedRemover;
	}

	public DuplicateRemovedScheduler setDuplicateRemover(DuplicateRemover duplicatedRemover) {
		this.duplicatedRemover = duplicatedRemover;
		return this;
	}

	@Override
	public void push(Request request, Task task) {
		logger.trace("get a candidate url {}", request.getUrl());
		if (shouldReserved(request) || noNeedToRemoveDuplicate(request)
				|| !duplicatedRemover.isDuplicate(request, task)) {
			logger.debug("push to queue {}", request.getUrl());
			pushWhenNoDuplicate(request, task);
		}
	}

	protected boolean shouldReserved(Request request) {
		return request.getExtra(Request.CYCLE_TRIED_TIMES) != null;
	}

	protected boolean noNeedToRemoveDuplicate(Request request) {
		return HttpConstant.Method.POST.equalsIgnoreCase(request.getMethod());
	}

	protected void pushWhenNoDuplicate(Request request, Task task) {

	}

	@Override
	public void reset(Task task) {
		duplicatedRemover.resetDuplicateCheck(task);
	}
}
